* Title: 	bls_clean.do
* Version: 	31 May 2018
* Purpose: 	Clean state GDP data from BEA

*******************************************************************************
* (0) Start of file
*******************************************************************************

capture log close
log using log/bea_clean, replace
set more off
clear all
macro drop _all
pause on

*******************************************************************************
* (1) Load and clean spreadsheet for GDP price deflator
*******************************************************************************
import excel using dta/raw/bea_raw.xlsx, clear sheet("Deflator_National")
keep if (A == "Line" & C != "") | A == "1"
destring _all, replace

foreach var of varlist C-CM {
	summ `var' if A == "Line"
	local year = `r(mean)'
	ren `var' GDPDeflator_Nat`year'
}
keep if A == "1"
keep A GDPDeflator_Nat*
reshape long GDPDeflator_Nat, i(A) j(year)
keep year GDPDeflator_Nat
sort year

tempfile deflator
save `deflator'


*******************************************************************************
* (2) Load and clean spreadsheets for state GDP
*******************************************************************************

foreach x in naics sic {
	if "`x'" == "naics" {
		local sheetname "State_NAICS"
		local endvar 	"W"
		local gdpvar 	"GDP_Real"
	}
	if "`x'" == "sic" {
		local sheetname "State_SIC"
		local endvar 	"AK"
		local gdpvar 	"GDP_Nom"
	}	
	
	import excel using dta/raw/bea_raw.xlsx, clear sheet("`sheetname'")
	keep if !mi(C)
	destring _all, replace
	foreach var of varlist C-`endvar' {
		summ `var' if A == "Fips"
		local year = `r(mean)'
		ren `var' `gdpvar'`year'
	}
	drop if A == "Fips"
	destring _all, replace
	
	* Keep only states (drop U.S. total and regions)
	ren A statefip
	ren B StateName
	replace statefip = statefip/1000
	drop if statefip<1 | statefip>56
	
	tempfile `x'
	save ``x''
}


*******************************************************************************
* (3) Construct consistent state-level GDP series
*******************************************************************************

* For SIC data, convert to 2009 dollars using national GDP deflator
use `sic', clear
reshape long GDP_Nom, i(statefip StateName) j(year)
merge m:1 year using `deflator', assert(matched using) keep(matched) nogen
gen GDP_Real_Unadj = GDP_Nom / (GDPDeflator_Nat/100)
keep year statefip GDP_Real_Unadj
reshape wide GDP_Real_Unadj, i(statefip) j(year)

* Regress 1997 NAICS GDP on 1997 SIC GDP to get scaling factor
merge 1:1 statefip using `naics', assert(matched) nogen
regress GDP_Real1997 GDP_Real_Unadj1997, nocons
local coeff_total = _b[GDP_Real_Unadj1997]

* Rescale 1963-1996 SIC
forval year = 1963/1996 {
	gen GDP_Real`year' = GDP_Real_Unadj`year' * `coeff_total'
}
drop GDP_Real_Unadj*

reshape long GDP_Real, i(statefip StateName) j(year)


*******************************************************************************
* (4) End of file
*******************************************************************************

sort statefip year
compress

save dta/bea_clean, replace

log close
exit, clear
